{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd # need this \n",
    "import datetime as dt\n",
    "import numpy as np\n",
    "import requests, io\n",
    "import os\n",
    "\n",
    "pd.set_option('display.max_colwidth', 50)\n",
    "# set to -1 to get it do display everything"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook will grab and manipulate the Chinese tariff data as reported by Bown, Jung,  and Zhang PIIE article here:\n",
    "\n",
    "https://www.piie.com/blogs/trade-and-investment-policy-watch/trump-has-gotten-china-lower-its-tariffs-just-toward\n",
    "\n",
    "This will provide an updated tariff series that starts from the mfn values, then incorperates Chinese retaliation against the US in addition to the adjustments that China made on other products. Here is a blog post I found helpfull describing some of the issues:\n",
    "\n",
    "https://www.china-briefing.com/news/new-tariff-export-duty-cuts-china-2019-wide-variety-products-affected/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### Step 1\n",
    "\n",
    "Here we will readin the data and do some simple cleaning..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "#url = \"https://www.piie.com/system/files/documents/bown-jung-zhang-2019-06-12.xlsx\"\n",
    "\n",
    "location = \"https://github.com/mwaugh0328/consumption_and_tradewar/raw/master/data/bown-jung-zhang-2019-06-12.xlsx\"\n",
    "# This is the link to the blog post that has all the changes in the tariffs\n",
    "# and the article by Bown , Jung  and Zhang \n",
    "\n",
    "df_tariffs = pd.read_excel(location, sheet_name = \"China Tariff Rates\", dtype = {\"hs10\": str})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "First thing I want to do is to rename the column to have just the dates so I can eventually have a time series by product of the tariffs. This is what I do below."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "cnames = df_tariffs.columns.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "time_dict = [dt.datetime(2018,1,1), dt.datetime(2018,4,2), dt.datetime(2018,5,1), dt.datetime(2018,7,1), \n",
    "             dt.datetime(2018,7,6), dt.datetime(2018,8,23), dt.datetime(2018,9,24), dt.datetime(2018,11,1),\n",
    "             dt.datetime(2019,1,1), dt.datetime(2019,1,2), dt.datetime(2019,6,1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "tariff_times = dict(zip(cnames[2:], time_dict)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_tariffs.rename(columns = tariff_times, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This will create teh 8 and 6 digit codes\n",
    "\n",
    "df_tariffs[\"hs8\"] = df_tariffs.hs10.str[0:8]\n",
    "\n",
    "df_tariffs[\"hs6\"] = df_tariffs.hs10.str[0:6]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hs10</th>\n",
       "      <th>description</th>\n",
       "      <th>2018-01-01 00:00:00</th>\n",
       "      <th>2018-04-02 00:00:00</th>\n",
       "      <th>2018-05-01 00:00:00</th>\n",
       "      <th>2018-07-01 00:00:00</th>\n",
       "      <th>2018-07-06 00:00:00</th>\n",
       "      <th>2018-08-23 00:00:00</th>\n",
       "      <th>2018-09-24 00:00:00</th>\n",
       "      <th>2018-11-01 00:00:00</th>\n",
       "      <th>2019-01-01 00:00:00</th>\n",
       "      <th>2019-01-02 00:00:00</th>\n",
       "      <th>2019-06-01 00:00:00</th>\n",
       "      <th>hs8</th>\n",
       "      <th>hs6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0101210010</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>01012100</td>\n",
       "      <td>010121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0101210090</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>01012100</td>\n",
       "      <td>010121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0101290010</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>01012900</td>\n",
       "      <td>010129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0101290090</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>10.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>10</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>01012900</td>\n",
       "      <td>010129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>0101301010</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Asses: ...</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>01013010</td>\n",
       "      <td>010130</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         hs10                                        description  \\\n",
       "0  0101210010  Live horses, asses, mules and hinnies: Horses:...   \n",
       "1  0101210090  Live horses, asses, mules and hinnies: Horses:...   \n",
       "2  0101290010  Live horses, asses, mules and hinnies: Horses:...   \n",
       "3  0101290090  Live horses, asses, mules and hinnies: Horses:...   \n",
       "4  0101301010  Live horses, asses, mules and hinnies: Asses: ...   \n",
       "\n",
       "   2018-01-01 00:00:00  2018-04-02 00:00:00  2018-05-01 00:00:00  \\\n",
       "0                  0.0                    0                  0.0   \n",
       "1                  0.0                    0                  0.0   \n",
       "2                 10.0                    0                  0.0   \n",
       "3                 10.0                    0                  0.0   \n",
       "4                  0.0                    0                  0.0   \n",
       "\n",
       "   2018-07-01 00:00:00  2018-07-06 00:00:00  2018-08-23 00:00:00  \\\n",
       "0                  0.0                    0                    0   \n",
       "1                  0.0                    0                    0   \n",
       "2                  0.0                    0                    0   \n",
       "3                  0.0                    0                    0   \n",
       "4                  0.0                    0                    0   \n",
       "\n",
       "   2018-09-24 00:00:00  2018-11-01 00:00:00  2019-01-01 00:00:00  \\\n",
       "0                    0                  0.0                  0.0   \n",
       "1                    0                  0.0                  0.0   \n",
       "2                   10                  0.0                  0.0   \n",
       "3                   10                  0.0                  0.0   \n",
       "4                    0                  0.0                  0.0   \n",
       "\n",
       "   2019-01-02 00:00:00  2019-06-01 00:00:00       hs8     hs6  \n",
       "0                    0                    0  01012100  010121  \n",
       "1                    0                    0  01012100  010121  \n",
       "2                    0                   15  01012900  010129  \n",
       "3                    0                   15  01012900  010129  \n",
       "4                    0                    0  01013010  010130  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_tariffs.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### Step 2\n",
    "\n",
    "Now I will melt the dataframe to make it long. Groupby hs10 code and take the cummulative sum of the \"value\" in the dataframe. This will give the level of the tariff rate and how it changes by time."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df_tariffs.melt(id_vars = [\"hs10\", \"description\", \"hs6\", \"hs8\"])\n",
    "\n",
    "df.rename(columns = {\"variable\": \"time_of_tariff\"}, inplace = True)\n",
    "\n",
    "df['tariff'] = df.groupby(['hs10'])['value'].apply(lambda x: x.cumsum())\n",
    "# This is one of those fancy/amazing pandas things...group by hs10, graby the value, apply the function cum sum.\n",
    "# So what this is doing is for each hs10 grouping take the cummulative sum of the tariff value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then lets grab this to see what it looks like..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>hs10</th>\n",
       "      <th>description</th>\n",
       "      <th>hs6</th>\n",
       "      <th>hs8</th>\n",
       "      <th>time_of_tariff</th>\n",
       "      <th>value</th>\n",
       "      <th>tariff</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>0101290010</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>010129</td>\n",
       "      <td>01012900</td>\n",
       "      <td>2018-01-01</td>\n",
       "      <td>10.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>0101290090</td>\n",
       "      <td>Live horses, asses, mules and hinnies: Horses:...</td>\n",
       "      <td>010129</td>\n",
       "      <td>01012900</td>\n",
       "      <td>2018-01-01</td>\n",
       "      <td>10.0</td>\n",
       "      <td>10.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         hs10                                        description     hs6  \\\n",
       "2  0101290010  Live horses, asses, mules and hinnies: Horses:...  010129   \n",
       "3  0101290090  Live horses, asses, mules and hinnies: Horses:...  010129   \n",
       "\n",
       "        hs8 time_of_tariff  value  tariff  \n",
       "2  01012900     2018-01-01   10.0    10.0  \n",
       "3  01012900     2018-01-01   10.0    10.0  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hs_grp = df.groupby([\"hs6\", \"time_of_tariff\"])\n",
    "\n",
    "hs_grp.get_group((\"010129\", dt.datetime(2018,1,1)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "### Step 3\n",
    "\n",
    "Choices. Now the issue is that the mfn tariffs are at the 10 digit level, but to match this thing up with US exports in a consistent way, we need to go to the 6 digit. My understanding is that across countries, only up to the 6 digit are things consistent (in fact you can see this when tyring to merge US exports with the tariffs at the 10 digit) level. \n",
    "\n",
    "So the solution will be the following. We will try a couple of different aggregations and see if it matters. Note that it appears that the Tariff Retaliation was at the 6 digit level (as there is no variation in tariffs across products within the 6 digit level). What variation this is missing is the initial level which (sometimes) does vary within products. \n",
    "\n",
    "**Updated** however you groupby does not matter for final results. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "tariffs_hs6_max = df.groupby([\"hs6\", \"time_of_tariff\"]).agg({\"tariff\": \"max\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "tariffs_hs6_max.reset_index(inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "tariffs_hs6_max.set_index(\"time_of_tariff\", inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop the 2019, 1, 1 observation as the 2019,1,2, supercedes it. \n",
    "\n",
    "tariffs_hs6_max.drop(labels=dt.datetime(2019,1,1), axis = 0, inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "tariffs_hs6_max.reset_index(inplace = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time_of_tariff</th>\n",
       "      <th>hs6</th>\n",
       "      <th>tariff</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>2018-01-01</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>2018-04-02</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>2018-05-01</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>2018-07-01</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>2018-07-06</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>2018-08-23</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>2018-09-24</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>7</td>\n",
       "      <td>2018-11-01</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>8</td>\n",
       "      <td>2019-01-02</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>9</td>\n",
       "      <td>2019-06-01</td>\n",
       "      <td>010121</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  time_of_tariff     hs6  tariff\n",
       "0     2018-01-01  010121     0.0\n",
       "1     2018-04-02  010121     0.0\n",
       "2     2018-05-01  010121     0.0\n",
       "3     2018-07-01  010121     0.0\n",
       "4     2018-07-06  010121     0.0\n",
       "5     2018-08-23  010121     0.0\n",
       "6     2018-09-24  010121     0.0\n",
       "7     2018-11-01  010121     0.0\n",
       "8     2019-01-02  010121     0.0\n",
       "9     2019-06-01  010121     0.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tariffs_hs6_max.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "location = os.getcwd()\n",
    "\n",
    "tariffs_hs6_max.to_csv(location + \"\\\\data\"+ \"\\\\new_tariff_list_max.csv\",index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
